In [1]:
from time import process_time
from collections import namedtuple
from scipy import stats
from numpy.random import randint
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
def get_sample(sample_max_size = 2 ** 32,
random_size = True, element_range=(0, 2 ** 16), value_type=int):
"""
Generate a pseudo-random sample of size `sample_max_size` if `random_size`
is False, otherwise a pseudo-random size from 0 to `sample_max_size` -1
Each element of the sample falls in the range defined by the `element_range`
param and belongs to the type `value_type`
"""
sample_size = randint(0, sample_max_size) if random_size else sample_max_size
return randint(low=element_range[0], size=sample_size,
high=element_range[1], dtype=value_type)
In [3]:
def clf_function(a, b):
return a > b
In [4]:
def bubble_sort(elements, clf_function):
length = len(elements)
for i in range(length):
swapped = False
for j in range(length - i -1):
if clf_function(elements[j], elements[j+1]):
elements[j], elements[j+1] = elements[j+1], elements[j]
swapped = True
if not swapped:
break
return elements
In [5]:
def performance_test(sample, clf_function):
built_in_time_0 = process_time()
built_in_sorted_list = sorted(sample)
built_in_delta = process_time() - built_in_time_0
comparison_time_0 = process_time()
comparison_sorted_list = bubble_sort(sample, clf_function)
comparison_delta = process_time() - comparison_time_0
Result_test = namedtuple('Test', 'elements, delta_time')
built_in = Result_test(built_in_sorted_list, built_in_delta)
cmp = Result_test(comparison_sorted_list, comparison_delta)
return (built_in, cmp)
In [6]:
def effectiveness_test(pattern, sample, quiet_mode=False):
if any(pattern != sample):
for p, s in zip(pattern, sample):
if p != s and not quiet_mode:
print("{}/{}".format(p, s))
return False
else:
if not quiet_mode:
print("Success")
return True
In [7]:
def run_tests(algorithm, clf_function, sample_size=128, test_size=1024):
errors = 0
built_in_time = []
comparison_time = []
for test in range(test_size):
sample = get_sample(sample_size, random_size=False)
pattern, sample = performance_test(sample, clf_function)
if effectiveness_test(pattern.elements, sample.elements, quiet_mode=True):
built_in_time.append(pattern.delta_time)
comparison_time.append(sample.delta_time)
else:
errors += 1
return (errors, built_in_time, comparison_time)
In [8]:
def comparison_report(pattern, comparison):
pattern_summary = stats.describe(pattern)
comparison_summary = stats.describe(comparison)
print('Built in average time: {:16.8f}\n'
'Comparison average time: {:16.8f}\n'
'Relative mean efficience: {:16.8f}'.
format(
pattern_summary.mean,
comparison_summary.mean,
comparison_summary.mean / pattern_summary.mean
)
)
print('-' * 80)
print('Built in variance time: {:20.12f}\n'
'Comparison variance time: {:20.12f}\n'
'Relative variance: {:20.12f}'.
format(
pattern_summary.variance,
comparison_summary.variance,
comparison_summary.variance / pattern_summary.variance
)
)
return (pattern_summary.mean, comparison_summary.mean)
In [9]:
def plot_pattern_time(pattern_time, test_size, sample_size):
plt.xlabel("{} runs of sample Size: {}".format(test_size, sample_size))
plt.ylabel("$Elapsed \quad Time$")
plt.title("Built in Sort")
plt.plot(
pattern_time, "b{}-".format("o" if test_size < 25 else "")
)
maximal = max(pattern_time)
minimal = min(pattern_time)
plt.plot(
[maximal] * test_size, "{}-".format("o" if test_size < 25 else ""),
label="Max: {:.6f}".format(maximal)
)
plt.plot(
[minimal] * test_size, "{}-".format("o" if test_size < 25 else ""),
label="Min: {:.6f}".format(minimal)
)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.axis([-.1, test_size - .9, 0, maximal * 1.5])
plt.grid(True)
In [10]:
def plot_cmp_time(cmp_time, test_size, sample_size):
plt.xlabel("{} runs of sample Size: {}".format(test_size, sample_size))
plt.ylabel("$Elapsed \quad Time$")
plt.title("Bubble sort")
plt.plot(
cmp_time, "r{}-".format("o" if test_size < 25 else ""),
)
maximal, minimal = max(cmp_time), min(cmp_time)
plt.plot(
[maximal] * test_size, "{}-".format("o" if test_size < 25 else ""),
label="Max: {:.6f}".format(maximal)
)
plt.plot(
[minimal] * test_size, "{}-".format("o" if test_size < 25 else ""),
label="Min: {:.6f}".format(minimal)
)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.axis([-.1, test_size - .9, 0, maximal * 1.5])
plt.grid(True)
In [11]:
def plot_both_times(pattern_time, cmp_time, test_size, sample_size):
plt.xlabel("{} runs of sample Size: {}".format(test_size, sample_size))
plt.ylabel("$Elapsed \quad Time$")
plt.title("Relative performance")
plt.plot(
pattern_time, "b{}-".format("o" if test_size < 25 else ""), label="Built in Sort"
)
plt.plot(
cmp_time, "r{}-".format("o" if test_size < 25 else ""), label="Bubble Sort"
)
plt.legend(loc="upper right")
maximal = max(cmp_time + pattern_time)
plt.axis([-.1, test_size - .9, 0, maximal * 1.5])
plt.grid(True)
In [12]:
def plot_quotient_relation(pattern_timem, cmp_time, test_size, sample_size):
plt.xlabel("{} runs of sample Size: {}".format(test_size, sample_size))
plt.ylabel("$Relative \quad Elapsed \quad Time$")
plt.title("Bubble sort / Built in Sort")
ratios = [bubble / cmp for bubble, cmp in zip(cmp_time, pattern_time)]
plt.plot(ratios, "g{}-".format("o" if test_size < 25 else ""))
maximal = max(ratios)
minimal = min(ratios)
plt.plot(
[maximal] * test_size, "{}-".format("o" if test_size < 25 else ""),
label="Max: {:.6f}".format(maximal)
)
plt.plot(
[minimal] * test_size, "{}-".format("o" if test_size < 25 else ""),
label="Min: {:.6f}".format(minimal)
)
plt.plot(
[1] * test_size, "{}-".format("o" if test_size < 25 else ""), label="1"
)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.axis([-.1, test_size - .9, 0, maximal * 1.5])
plt.grid(True)
In [13]:
def plot_graphics(pattern_time, cmp_time, test_size, sample_size):
# Built in sort method
plt.subplot(221)
plot_pattern_time(pattern_time, test_size, sample_size)
# Bubble sort
plt.subplot(222)
plot_cmp_time(cmp_time, test_size, sample_size)
# Both Bubble and Built in sort
plt.subplot(223)
plot_both_times(pattern_time, cmp_time, test_size, sample_size)
# Ratio between Bubble Sort / Built in Sort
plt.subplot(224)
plot_quotient_relation(pattern_time, cmp_time, test_size, sample_size)
plt.subplots_adjust(top=1.5, bottom=0, left=0.10, right=1.95, hspace=.5, wspace=.75)
plt.show()
In [14]:
def plot_pattern_growth(suite_cases, pattern_time, suite_size):
plt.xlabel("Test suite size".format(len(growth_behaviour)))
plt.ylabel("$Average \quad Elapsed \quad Time$")
plt.title("Average performance per suite")
plt.plot(
suite_cases, pattern_time, "b{}-".
format("o" if suite_size < 25 else ""), label="Built in Sort"
)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.grid(True)
In [15]:
def plot_cmp_growth(suite_cases, cmp_time, suite_size):
plt.xlabel("Test suite size".format(len(growth_behaviour)))
plt.ylabel("$Average \quad Elapsed \quad Time$")
plt.title("Average performance per suite")
plt.plot(
suite_cases, cmp_time, "r{}-".
format("o" if suite_size < 25 else ""), label="Bubble Sort"
)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.grid(True)
In [16]:
def plot_both_growth_times(suite_cases, pattern_time, cmp_time, suite_size):
plt.xlabel("Test suite size".format(len(growth_behaviour)))
plt.ylabel("$Average \quad Elapsed \quad Time$")
plt.title("Average performance per suite")
plt.plot(
suite_cases, cmp_time, "r{}-".
format("o" if suite_size < 25 else ""), label="Bubble Sort"
)
plt.plot(
suite_cases, pattern_time, "b{}-".
format("o" if suite_size < 25 else ""), label="Built in Sort"
)
plt.legend(loc="upper left")
plt.grid(True)
In [17]:
def plot_growth_graphics(growth_behaviour):
suite_size = len(growth_behaviour)
suite_cases = [entry.sample_size for entry in growth_behaviour]
pattern_time = [entry.pattern_average for entry in growth_behaviour]
cmp_time = [entry.cmp_average for entry in growth_behaviour]
# Built in sort method
plt.subplot(221)
plot_pattern_growth(suite_cases, pattern_time, suite_size)
# Bubble sort
plt.subplot(222)
plot_cmp_growth(suite_cases, cmp_time, suite_size)
# Both Bubble and Built in sort
plt.subplot(223)
plot_both_growth_times(suite_cases, pattern_time, cmp_time, suite_size)
plt.subplots_adjust(top=1.5, bottom=0, left=0.10, right=1.95, hspace=.5, wspace=.75)
plt.show()
In [18]:
AverageTime = namedtuple('AverageTime', 'pattern_average, cmp_average, sample_size')
growth_behaviour = []
In [19]:
sample_size = 1
test_size = 2048
In [20]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [21]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [22]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [23]:
sample_size = 2
test_size = 2048
In [24]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [25]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [26]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [27]:
sample_size = 4
test_size = 2048
In [28]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [29]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [30]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [31]:
sample_size = 8
test_size = 2048
In [32]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [33]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [34]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [35]:
sample_size = 16
test_size = 2048
In [36]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [37]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [38]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [39]:
sample_size = 32
test_size = 2048
In [40]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [41]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [42]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [43]:
sample_size = 64
test_size = 2048
In [44]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [45]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [46]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [47]:
sample_size = 128
test_size = 2048
In [48]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [49]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [50]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [51]:
sample_size = 256
test_size = 2048
In [52]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [53]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [54]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [55]:
sample_size = 512
test_size = 2048
In [56]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [57]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [58]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [59]:
sample_size = 1024
test_size = 2048
In [60]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [61]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [62]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [63]:
sample_size = 2048
test_size = 2048
In [64]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [65]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [66]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [67]:
sample_size = 4096
test_size = 2048
In [68]:
errors, pattern_time, cmp_time = run_tests(
bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)
print("In the end of the day {} error{} occurred in {} runs of size {}".
format(errors, "s" if errors > 1 else "", test_size, sample_size))
In [69]:
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))
In [70]:
plot_graphics(pattern_time, cmp_time, test_size, sample_size)
In [71]:
plot_growth_graphics(growth_behaviour)